Importing all the libraries required

In [24]:
import fastlmm
import h5py
import numpy as np
import time
# set degree of verbosity (adapt to INFO for more verbose output)
import logging
logging.basicConfig(level=logging.WARNING)

# set figure sizes
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

# set display width for pandas data frames
import pandas as pd
pd.set_option('display.width', 1000)

# import the algorithm
from fastlmm.association import single_snp
from pysnptools.snpreader import Pheno, Bed, SnpData
import fastlmm.util.util as flutil
from pysnptools.snpreader import SnpHdf5 
from fastlmm.association import single_snp_linreg
#QQ-Plots
from fastlmm.util.stats import plotp
In [3]:
bed_fn = "imputed_snps_binary.hdf5"
pheno_fn = "phenotype.txt"

#Loading the SNP data
f = h5py.File("1001_SNP_MATRIX/imputed_snps_binary.hdf5","r")

#Retrieving the data from the hdf5 snp file.
snps = f["snps"]
accessions = f["accessions"]
positions = f["positions"]
In [4]:
# Details of chromosomes
chr_regions = positions.attrs["chr_regions"]
indices_for_chr1 = chr_regions[0]
print(indices_for_chr1)
[      0 2597825]
In [5]:
#Accessions which are available in 1001genomes org with their respective ids
df = pd.read_csv("AvailableMappedAccessions.csv")

#indices list of accessions that will be used for GWAS
accessions = accessions[:]
accs = accessions.astype(np.int64)
indices_of_accessions = [ np.where(accs == df["Accession Id"][i])[0][0] for i in range(len(df))]
indices_of_accessions = sorted(indices_of_accessions)
print (indices_of_accessions, len(indices_of_accessions))
([174, 313, 322, 325, 330, 331, 336, 337, 339, 351, 352, 357, 360, 363, 365, 372, 374, 383, 394, 418, 423, 464, 472, 473, 484, 502, 543, 546, 551, 560, 562, 564, 568, 579, 1049], 35)
In [6]:
#Snp data for creating SNP data matrix
#We need iid, sid, and vals
#iid is required to create SNP Data

iid = [[i+1, sorted(df["Accession Name"])[i]] for i in range(len(df))]
print ("iids : ", iid)

genes = snps[:, indices_of_accessions]
#Filtering genes for chromosome 1
genes = genes[: indices_for_chr1[-1]]
threshold= 5

genes = np.transpose(genes)
alleles = np.where(genes.sum(axis=0) >threshold)
snps = genes[:,alleles[0]]
print ("snps shape", snps.shape)
('iids : ', [[1, 'Aa-0'], [2, 'Bla-1'], [3, 'Bs-1'], [4, 'Bu-0'], [5, 'Co'], [6, 'Col-0'], [7, 'Eds-1'], [8, 'Ei-2'], [9, 'Fei-0'], [10, 'Gu-0'], [11, 'H55'], [12, 'HR-10'], [13, 'Hs-0'], [14, 'Is-0'], [15, 'Jm-0'], [16, 'Kz-13'], [17, 'Kz-9'], [18, 'Ler-1'], [19, 'Lu-1'], [20, 'Mir-0'], [21, 'Ms-0'], [22, 'PHW-2'], [23, 'Pla-0'], [24, 'San-2'], [25, 'Sf-2'], [26, 'Sorbo'], [27, 'Sq-1'], [28, 'Stw-0'], [29, 'Ta-0'], [30, 'Ts-5'], [31, 'Ull1-1'], [32, 'Uod-1'], [33, 'Utrecht'], [34, 'Ws-2'], [35, 'Zdr-1']])
('snps shape', (35, 242403))
In [7]:
number_of_genes_discarded = genes.shape[1] - snps.shape[1]
print ("number of genes discarded", number_of_genes_discarded)
('number of genes discarded', 2355422)
In [13]:
#sid is required to create SNP data
sid = range(snps.shape[1])
print(sid[:5])
[0, 1, 2, 3, 4]
In [14]:
#it's important create snps positions
pos = []
for i in sid:
    pos.append([1,i, i])
pos = np.array(pos) #Converting into a numpy array
In [11]:
snps = snps.astype(np.float64)
snps.dtype
Out[11]:
dtype('float64')
In [15]:
#Creating SNP Data
snp_data = SnpData(iid =  iid, sid = sid,
                                val = snps, pos = pos)
In [16]:
snp_data.sid_count
Out[16]:
242403
In [17]:
snp_data.read().val[0]
Out[17]:
array([0., 1., 1., ..., 1., 0., 1.])
In [18]:
#Verifying Phenotypes
# pheno_vals = pheno_fn.read().val
# print(pheno_vals)
In [26]:
#Working with Phenotypes for features 1-64
phenotype_file_name = "PhenotypesAutoencoder/phenotypesFeature"
suffix = ".txt"

def gwasAnalysisWithManhattanPlot(pheno_fn):
    t0 = time.time()
    results_dataframe_feature_ = single_snp(test_snps=snp_data, pheno=pheno_fn, count_A1=False)
    t1 = time.time()

    print ("Time taken for analysis =", str(t1-t0),"seconds")
    print("\nManhattan Plot")
    flutil.manhattan_plot(results_dataframe_feature_[["Chr", "ChrPos", "PValue"]],pvalue_line=1e-5,xaxis_unit_bp=False)
    pylab.show()
    print("\nQQ Plot")
    plotp.qqplot(results_dataframe_feature_["PValue"].values, xlim=[0,5], ylim=[0,5])
    pylab.show()
    
for feature in range(1,65):
    pheno_fn = phenotype_file_name + str(feature) + suffix
#     print(pheno_fn)
    print("Single SNP GWAS on" +phenotype_file_name+str(feature))
    gwasAnalysisWithManhattanPlot(pheno_fn)
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature1
('Time taken for analysis =', '61.1611728668', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2674
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature2
('Time taken for analysis =', '61.4130549431', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8852
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature3
('Time taken for analysis =', '60.7037088871', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8703
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature4
('Time taken for analysis =', '61.2864890099', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9137
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature5
('Time taken for analysis =', '62.0415329933', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9908
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature6
('Time taken for analysis =', '61.2780070305', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8193
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature7
('Time taken for analysis =', '61.9314210415', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1828
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature8
('Time taken for analysis =', '61.3694491386', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.3121
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature9
('Time taken for analysis =', '61.6268930435', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9770
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature10
('Time taken for analysis =', '61.3853809834', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.3733
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature11
('Time taken for analysis =', '61.4762909412', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8163
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature12
('Time taken for analysis =', '61.1031610966', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0971
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature13
('Time taken for analysis =', '61.1428029537', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1893
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature14
('Time taken for analysis =', '61.345717907', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0819
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature15
('Time taken for analysis =', '61.4951028824', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.7916
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature16
('Time taken for analysis =', '62.0290560722', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9280
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature17
('Time taken for analysis =', '61.2937939167', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0888
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature18
('Time taken for analysis =', '60.7088549137', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.3671
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature19
('Time taken for analysis =', '61.7628588676', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8684
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature20
('Time taken for analysis =', '61.6849379539', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0838
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature21
('Time taken for analysis =', '61.610517025', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2446
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature22
('Time taken for analysis =', '66.3458988667', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1198
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature23
('Time taken for analysis =', '68.9998700619', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2468
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature24
('Time taken for analysis =', '66.2439370155', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2750
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature25
('Time taken for analysis =', '67.462072134', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2099
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature26
('Time taken for analysis =', '65.9550271034', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2223
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature27
('Time taken for analysis =', '61.5741920471', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2478
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature28
('Time taken for analysis =', '61.7201678753', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.7812
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature29
('Time taken for analysis =', '62.1834321022', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0476
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature30
('Time taken for analysis =', '62.9097969532', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.7711
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature31
('Time taken for analysis =', '63.0232949257', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1894
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature32
('Time taken for analysis =', '63.7493841648', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8820
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature33
('Time taken for analysis =', '61.7286329269', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.3491
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature34
('Time taken for analysis =', '61.6869289875', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2427
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature35
('Time taken for analysis =', '61.6039690971', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.7617
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature36
('Time taken for analysis =', '61.5895659924', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9791
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature37
('Time taken for analysis =', '61.5372560024', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.6566
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature38
('Time taken for analysis =', '61.4146080017', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.7631
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature39
('Time taken for analysis =', '61.2882819176', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9628
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature40
('Time taken for analysis =', '61.681363821', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9546
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature41
('Time taken for analysis =', '62.2764520645', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8455
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature42
('Time taken for analysis =', '61.9081530571', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9575
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature43
('Time taken for analysis =', '62.0111010075', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9368
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature44
('Time taken for analysis =', '61.3498780727', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.3715
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature45
('Time taken for analysis =', '61.7899599075', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9503
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature46
('Time taken for analysis =', '61.9056539536', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.5182
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature47
('Time taken for analysis =', '61.2660720348', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.5267
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature48
('Time taken for analysis =', '61.667896986', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1000
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature49
('Time taken for analysis =', '60.7015969753', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9356
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature50
('Time taken for analysis =', '61.6124179363', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.5257
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature51
('Time taken for analysis =', '61.767802', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0644
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature52
('Time taken for analysis =', '61.4799740314', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.7602
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature53
('Time taken for analysis =', '61.3336191177', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0869
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature54
('Time taken for analysis =', '62.066380024', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.2063
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature55
('Time taken for analysis =', '61.775965929', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9511
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature56
('Time taken for analysis =', '61.9531910419', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1232
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature57
('Time taken for analysis =', '61.343775034', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1530
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature58
('Time taken for analysis =', '61.5187950134', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1759
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature59
('Time taken for analysis =', '60.4185161591', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.1452
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature60
('Time taken for analysis =', '60.5353519917', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8365
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature61
('Time taken for analysis =', '60.3178639412', 'seconds')

Manhattan Plot
QQ Plot
lambda=1.0427
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature62
('Time taken for analysis =', '60.2812771797', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.6709
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature63
('Time taken for analysis =', '60.4729850292', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.8283
Single SNP GWAS onPhenotypesAutoencoder/phenotypesFeature64
('Time taken for analysis =', '60.0952529907', 'seconds')

Manhattan Plot
QQ Plot
lambda=0.9807